library(dplyr)
library(tidyr)
library(readr)
library(stringr)
library(ggplot2)
library(ggrepel)
library(topr)
library(UpSetR)
library(plyranges)
library(ieugwasr)
library(gwascat)
# Source helper functions
source("multi_funcs.R")

Principal components

plot_ancestry_PCs("antidep-2501-mrmega-N06A-DIV.log")

plot_ancestry_PCs("antidep-2501-mrmega-N06AA-DIV.log")

plot_ancestry_PCs("antidep-2501-mrmega-N06AB-DIV.log")

Manhattan plots

mrmega_n06a <- get_sumstats("antidep-2501-mrmega-N06A-DIV.gz")
## Rows: 10650849 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (5): MarkerName, EA, NEA, Effects, CHR
## dbl (23): Chromosome, Position, EAF, Nsample, Ncohort, beta_0, se_0, beta_1,...
## lgl  (1): Comments
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mrmega_n06aa <- get_sumstats("antidep-2501-mrmega-N06AA-DIV.gz")
## Rows: 9007284 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (5): MarkerName, EA, NEA, Effects, CHR
## dbl (23): Chromosome, Position, EAF, Nsample, Ncohort, beta_0, se_0, beta_1,...
## lgl  (1): Comments
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mrmega_n06ab <- get_sumstats("antidep-2501-mrmega-N06AB-DIV.gz")
## Rows: 10033491 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (5): MarkerName, EA, NEA, Effects, CHR
## dbl (23): Chromosome, Position, EAF, Nsample, Ncohort, beta_0, se_0, beta_1,...
## lgl  (1): Comments
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mrmega_n06a_assoc <- get_assoc(mrmega_n06a)
mrmega_n06aa_assoc <- get_assoc(mrmega_n06aa)
mrmega_n06ab_assoc <- get_assoc(mrmega_n06ab)
plot_manhat(mrmega_n06a_assoc)

plot_manhat(mrmega_n06aa_assoc)

plot_manhat(mrmega_n06ab_assoc)

Clumps

Get clumps for all phenotypes and merge together.

clumps_N06A <- get_clumps("antidep-2501-mrmega-N06A-DIV.clumps.tsv")
clumps_N06AA <- get_clumps("antidep-2501-mrmega-N06AA-DIV.clumps.tsv")
clumps_N06AB <- get_clumps("antidep-2501-mrmega-N06AB-DIV.clumps.tsv")

clumps_N06A
clumps_N06AA
clumps_N06AB
clumps <- bind_rows(list("N06A-DIV" = clumps_N06A,
                         "N06AA-DIV" = clumps_N06AA,
                         "N06AB-DIV" = clumps_N06AB),
                    .id = "dataset")

Write out table and column descriptions

clumps_table_basename <- str_c("clumps_mrmega_", "antidep-2501", ".clumps.csv")

clumps_colname_descriptions <-
c("dataset" = "meta-analysis dataset (phenotype-cluster)",
"Locus" = "locus number from clumping analysis",
"MarkerName" = "variant identifier",
"Chromosome" = "chromosome (numeric)",
"Position" = "base position in hg38",
"EA" = "effect allele",
"NEA" = "non-effect allele",
"EAF" = "frequency of effect allele",
"Nsample" = "effective sample size",
"Ncohort" = "number of cohorts",
"Effects" = "direction of effects",
"beta_0" = "effect of first PC of meta regression", 
"se_0" = "standard error of beta_0",
"beta_1" = "effect of second PC of meta regression",
"se_1" = "standard error of beta_1",
"beta_2" = "effect of third PC of meta regression",
"se_2" = "standard error of beta_2",
"beta_3" = "effect of fourth PC of meta regression",
"se_3" = "standard error of beta_3", 
"chisq_association" = "assocation test statistics",
"ndf_association" = "association degrees of freedom",
"P" = "association p-value",
"chisq_ancestry_het" = "ancestry heterogeneity test statistic", 
"ndf_ancestry_het" = "ancestry heterogeneity degress of freedom",
"P-value_ancestry_het" = "ancestry heterogeneity p-value",
"chisq_residual_het" = "residual heterogeneity test statistic", 
"ndf_residual_het" = "residual heterogeneity degress of freedom",
"P-value_residual_het" = "residual heterogeneity p-value",
"lnBF" = "log of Bayes Factor",
"Comments" = "Reason if marker was not analyzed", 
"CHR" = "chromosome (character)",
"start" = "start position of the locus",
"end" = "end position of the locus")

clumps_colname_descriptions_table <- tibble(column = names(clumps_colname_descriptions), description = clumps_colname_descriptions)

# check that all names match
if(any(names(clumps) != names(clumps_colname_descriptions))) {
    stop(str_glue("column names in {clumps_table_basename}are not all in colname_descriptions"))
}

write_csv(clumps, here::here("manuscript", "tables", clumps_table_basename))
write_csv(clumps_colname_descriptions_table, here::here("manuscript", "tables", str_glue("{clumps_table_basename}.cols")))

Catalogue

Lookup results in GWAS Catalog

gwcat <- get_cached_gwascat()

gwascat_N06A_table <- look_up_snps(clumps_N06A, gwcat)
gwascat_N06AA_table <- look_up_snps(clumps_N06AA, gwcat)
gwascat_N06AB_table <- look_up_snps(clumps_N06AB, gwcat)

Write out table and column descriptions

gwascat_basename <- "gwascat_mrmega_antidep-2501.csv"
gwascat_table <- bind_rows(list("N06A-DIV" = gwascat_N06A_table,
                         "N06AA-DIV" = gwascat_N06AA_table,
                         "N06AB-DIV" = gwascat_N06AB_table),
                    .id = "dataset")

gwascat_colname_descriptions <- 
c("dataset" = "meta-analysis dataset (phenotype-cluster)",
  "DISEASE/TRAIT" = "name of disease or trait phenotype",
  "SNP" = "variant identifier"
)

gwascat_colname_descriptions_table <- tibble(column = names(gwascat_colname_descriptions), description = gwascat_colname_descriptions)

# check that all names match
if(any(names(gwascat_table) != names(gwascat_colname_descriptions))) {
    stop(str_glue("column names in {gwascat_basename} are not all in colname_descriptions"))
}

write_csv(gwascat_table, here::here("manuscript", "tables", gwascat_basename))
write_csv(gwascat_colname_descriptions_table, here::here("manuscript", "tables", str_glue("{gwascat_basename}.cols")))